do not operate on a repository that has an empty name.
* move: Fix openFile crash with -J
(Fixes a reversion in 8.20201103)
+ * S3: Speed up importing from a large bucket when fileprefix= is set
+ by only asking for files under the prefix.
-- Joey Hess <id@joeyh.name> Mon, 03 Oct 2022 13:36:42 -0400
, renameExport = renameExportS3 hdl this rs info
}
, importActions = ImportActions
- { listImportableContents = listImportableContentsS3 hdl this info
+ { listImportableContents = listImportableContentsS3 hdl this info c
, importKey = Nothing
, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierS3 hdl this rs info
, storeExportWithContentIdentifier = storeExportWithContentIdentifierS3 hdl this rs info magic
srcobject = T.pack $ bucketExportLocation info src
dstobject = T.pack $ bucketExportLocation info dest
-listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
-listImportableContentsS3 hv r info =
+listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> ParsedRemoteConfig -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
+listImportableContentsS3 hv r info c =
withS3Handle hv $ \case
Nothing -> giveup $ needS3Creds (uuid r)
Just h -> Just <$> go h
ic <- liftIO $ runResourceT $ extractFromResourceT =<< startlist h
return (ImportableContentsComplete ic)
+ fileprefix = T.pack <$> getRemoteConfigValue fileprefixField c
+
startlist h
| versioning info = do
rsp <- sendS3Handle h $
continuelistversioned h [] rsp
| otherwise = do
rsp <- sendS3Handle h $
- S3.getBucket (bucket info)
+ (S3.getBucket (bucket info))
+ { S3.gbPrefix = fileprefix }
continuelistunversioned h [] rsp
continuelistunversioned h l rsp
rsp' <- sendS3Handle h $
(S3.getBucket (bucket info))
{ S3.gbMarker = S3.gbrNextMarker rsp
+ , S3.gbPrefix = fileprefix
}
continuelistunversioned h (rsp:l) rsp'
| otherwise = return $
(S3.getBucketObjectVersions (bucket info))
{ S3.gbovKeyMarker = S3.gbovrNextKeyMarker rsp
, S3.gbovVersionIdMarker = S3.gbovrNextVersionIdMarker rsp
+ , S3.gbovPrefix = fileprefix
}
continuelistversioned h (rsp:l) rsp'
| otherwise = return $
--- /dev/null
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 8"""
+ date="2022-10-10T21:04:49Z"
+ content="""
+I've finished the work on aws, which is in
+<https://github.com/aristidb/aws/pull/281> and I hope will be merged soon.
+
+git-annex now has a branch `anons3` that implements this, when
+the S3 remote is configured with signature=anonymous.
+
+ $ git-annex initremote s3-origin type=S3 importtree=yes encryption=none bucket=dandiarchive fileprefix=zarr-checksums/2ac71edb-738c-40ac-bd8c-8ca985adaa12/ signature=anonymous
+ initremote s3-origin (checking bucket...) ok
+ (recording state in git...)
+ $ git-annex import master --from s3-origin
+ list s3-origin ok
+ import s3-origin .checksum
+ ok
+ import s3-origin 0/.checksum
+ ok
+ import s3-origin 0/0/.checksum
+ ok
+ ^C
+
+Also, I've fixed it to only list files in the fileprefix, which
+sped up the listing a *lot* in this bucket with many other files..
+"""]]